import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

# 如果还没下载过词典，运行一次（第一次会下载）
nltk.download('vader_lexicon')

file_path = 'News Titles with Emotions.xlsx'   # <- 修改为你的输入文件路径
output_path = 'News Titles with Emotions_vader.xlsx'

# 初始化 VADER
analyzer = SentimentIntensityAnalyzer()

def read_titles_from_excel(df):
    # 尝试寻找列名 'Title' 或 'title'，否则使用第一列
    for col in df.columns:
        if str(col).strip().lower() == 'title':
            return df[col].astype(str).tolist()
    # 否则用第一列
    return df.iloc[:, 0].astype(str).tolist()

# 读取文件（根据后缀选择方法），并处理常见编码问题（针对 csv/txt）
if file_path.lower().endswith(('.xls', '.xlsx')):
    df_in = pd.read_excel(file_path)
    titles = read_titles_from_excel(df_in)

elif file_path.lower().endswith('.csv'):
    # 尝试几种常见编码
    encodings_to_try = ['utf-8', 'cp1252', 'gbk', 'latin1']
    for enc in encodings_to_try:
        try:
            df_in = pd.read_csv(file_path, encoding=enc)
            titles = read_titles_from_excel(df_in)
            break
        except Exception as e:
            last_exc = e
    else:
        raise last_exc

elif file_path.lower().endswith('.txt'):
    # txt 文件逐行读取，尝试常见编码
    encodings_to_try = ['utf-8', 'cp1252', 'gbk', 'latin1']
    for enc in encodings_to_try:
        try:
            with open(file_path, 'r', encoding=enc) as f:
                titles = [line.strip() for line in f if line.strip()]
            break
        except Exception as e:
            last_exc = e
    else:
        raise last_exc
else:
    raise ValueError("不支持的文件类型，请提供 .xlsx/.xls/.csv/.txt 文件。")

# 进行情感分析
data = {"Title": [], "Positive": [], "Neutral": [], "Negative": [], "Compound": []}
for title in titles:
    scores = analyzer.polarity_scores(str(title))
    data["Title"].append(title)
    data["Positive"].append(scores["pos"])
    data["Neutral"].append(scores["neu"])
    data["Negative"].append(scores["neg"])
    data["Compound"].append(scores["compound"])

df_out = pd.DataFrame(data)
df_out.to_excel(output_path, index=False)
print(f"VADER情感分析结果已保存到 {output_path}")
